
***********************************************************
* Internal migration and crime in Brazil *
* Author: Eva-Maria Egger 

* Contact: egger@wider.unu.edu
***********************************************************

* This do-file imports the data of the Demographic Census 2010 and creates new variables.
	
***********************************************************
** Set globals for directories

global tables
global graphs 
global data

*set directory

cd 

* Settings
set more off, permanently

***********************************************************
* Data available at the website of the Brazilian National Institute of Geography and Statistics 
*	https://www.ibge.gov.br/estatisticas/sociais/trabalho/9662-censo-demografico-2010.html?=&t=microdados
* Data has to be downloaded for each state.

* WARNING! Check the code to ensure you create respective sub-folders or correct the paths and spelling depending on the downloaded folder structure. For this study, I first downloaded the data in 2013 so folder structures might have changed. 

** WARNING! This dataset is very large so that the code proceeds in steps by first importing data from each state and only variables required for later. Then these sub-sets of data are appended into five datasets, one for each region. New variables are created using these sub-sets before they are then finally merged to a complete data set. 

	*INPUTS: 
		*Amostra_Domicilios_11.txt
		*Amostra_Domicilios_12.txt
		*Amostra_Domicilios_13.txt
		*Amostra_Domicilios_14.txt
		*Amostra_Domicilios_15.txt
		*Amostra_Domicilios_16.txt
		*Amostra_Domicilios_17.txt
		*Amostra_Domicilios_21.txt
		*Amostra_Domicilios_22.txt
		*Amostra_Domicilios_23.txt
		*Amostra_Domicilios_24.txt
		*Amostra_Domicilios_25.txt
		*Amostra_Domicilios_26.txt
		*Amostra_Domicilios_27.txt
		*Amostra_Domicilios_28.txt
		*Amostra_Domicilios_29.txt
		*Amostra_Domicilios_31.txt
		*Amostra_Domicilios_32.txt
		*Amostra_Domicilios_33.txt
		*Amostra_Domicilios_34.txt
		*Amostra_Domicilios_35.txt
		*Amostra_Domicilios_41.txt
		*Amostra_Domicilios_42.txt
		*Amostra_Domicilios_43.txt
		*Amostra_Domicilios_50.txt
		*Amostra_Domicilios_51.txt
		*Amostra_Domicilios_52.txt
		*Amostra_Domicilios_53.txt
		
	*OUTPUTS:
		*"CENSO 2010_wa.dta"

***********************************************************

//for all states of region 1//
forv s=1/7{
	tempfile dom pes
	infix uf 1-2 str municip 1-7 str ponder 8-20 str hhid 21-28 hhweight 29-44 region 45 mesoreg 46-47 microreg 48-50 metrop ///
	51-52 area 53 hhoccup 54-55 housetype 56-57 ownership 58 rent_v 59-64 rent_mw 65-73 walls 74 rooms 75-76 density 77-79 ///
	bedroom 80-81 density_bdr 82-84 bath 85-85 toilet 86 sanitation 87 water 88-89 canaliz 90 garbage 91 electricity 92 ///
	elect_meter 93 radio 94 tv 95 wash 96 fridge 97 mobile 98 tel 99 laptop 100 pc_internet 101 motobike 102  car 103 ///
	emigrant 104 inhabs 105-106 hhead 107 death 108 hhinc_v 109-115 hhinc_mw 116-125 hhinc_pc_v 126-133  hhinc_pc_mw 134-142 ///
	hhtype 143 adequate 144 using "1`s'/Domic’lios/Amostra_Domicilios_1`s'.txt", clear
	sort hhid
	save "`dom'", replace
	infix str hhid 21-28 iweight 29-44 hhead_rel 54-55 orderid 56-57 sex 58 age_ym 59-61 race 68 birthreg 69 defic_see 70 ///
	defic_hear 71 defic_walk 72 defic_mental 73 born_MC 74 born_UF 75 nationality 76 residencyr 77-80 birth_UF_ext 81 ///
	birth_UF 82-88 birth_ext 89-95 time_UF 96-98 time_MC 99-101 origin 102 origin_UF 103-109 origin_MC 110-116 origin_ext 117-123 ///
	reside_five 124 reside_UF 125-131 reside_MC 132-138 reside_ext 139-145 alphabet 146 school 147 school_course 148-149 ///
	educ_high 154-155 educ_high_grad 156 educ_level 158 educ_place 168 educ_UF 169-175 educ_MC 176-182 educ_ext 183-189 ///
	partner_live 190 partner_id 191-192 partner_type 193 marital 194 work_wk 195 work_wk_no 196 work_wk_hh 197 work_wk_subs 198 ///
	work_no 199 ocucpation 200-203 activity 204-208 work_position 209 work_emplyees 210 insurance_all 211 inc_mth_type_main 212 ///
	inc_mth_v_main 213-218 inc_v_main 219-224 inc_mw_main 225-230 inc_mth_typesec 231 inc_mth_vsec 232-237 inc_mw_sec 238-246 ///
	inc_vall 247-253 inc_mwall 254-262 inc_mth_vtot 263-269 inc_mth_mwtot 270-278 hhincpc_july_v 296-303 hhincpc_july_mw 304-312 ///
	work_hrs 313-315 work_search 316 work_avail 317 inc_pension 318 inc_bolsa 319 inc_social 320 inc_other 321 inc_nowork_v 322-327 ///
	work_place 328 work_UF 329-335 work_MC 336-342 work_ext 343-349 work_pendel 350 work_traffic 351 work_active 391 work_emp_main 392 ///
	work_emp_sec 393 work_stat_main 394 work_stat_sec 395 religion 396-398 family_id 402-403 family_memb 404-405 famincpc_v 406-413 ///
	famincpc_mw 414-422 insurance_main 432 insurance_all_sec 433 hh_type 434 family_indic 435 family_comp 436 family_comp_sec 437 ///
	using "1`s'/Pessoas/Amostra_Pessoas_1`s'.txt", clear
	sort hhid
	save "`pes'", replace
	merge m:1 hhid using "`dom'"
	drop _merge
	save "data by states/1`s'_all", replace
}

//for all states of region 2//
forv s=1/9{
	tempfile dom pes
	infix uf 1-2 str municip 1-7 str ponder 8-20 str hhid 21-28 hhweight 29-44 region 45 mesoreg 46-47 microreg 48-50 metrop 51-52 ///
	area 53 hhoccup 54-55 housetype 56-57 ownership 58 rent_v 59-64 rent_mw 65-73 walls 74 rooms 75-76 density 77-79 bedroom 80-81 ///
	density_bdr 82-84 bath 85-85 toilet 86 sanitation 87 water 88-89 canaliz 90 garbage 91 electricity 92 elect_meter 93 radio 94 ///
	tv 95 wash 96 fridge 97 mobile 98 tel 99 laptop 100 pc_internet 101 motobike 102  car 103 emigrant 104 inhabs 105-106 hhead 107 ///
	death 108 hhinc_v 109-115 hhinc_mw 116-125 hhinc_pc_v 126-133  hhinc_pc_mw 134-142 hhtype 143 adequate 144 ///
	using "2`s'/Domic’lios/Amostra_Domicilios_2`s'.txt", clear
	sort hhid
	save "`dom'", replace
	infix str hhid 21-28 iweight 29-44 hhead_rel 54-55 orderid 56-57 sex 58 age_ym 59-61 race 68 birthreg 69 defic_see 70 defic_hear 71 ///
	defic_walk 72 defic_mental 73 born_MC 74 born_UF 75 nationality 76 residencyr 77-80 birth_UF_ext 81 birth_UF 82-88 birth_ext 89-95 ///
	time_UF 96-98 time_MC 99-101 origin 102 origin_UF 103-109 origin_MC 110-116 origin_ext 117-123 reside_five 124 reside_UF 125-131 ///
	reside_MC 132-138 reside_ext 139-145 alphabet 146 school 147 school_course 148-149 educ_high 154-155 educ_high_grad 156 educ_level 158 ///
	educ_place 168 educ_UF 169-175 educ_MC 176-182 educ_ext 183-189 partner_live 190 partner_id 191-192 partner_type 193 marital 194 work_wk 195 ///
	work_wk_no 196 work_wk_hh 197 work_wk_subs 198 work_no 199 ocucpation 200-203 activity 204-208 work_position 209 work_emplyees 210 ///
	insurance_all 211 inc_mth_type_main 212 inc_mth_v_main 213-218 inc_v_main 219-224 inc_mw_main 225-230 inc_mth_typesec 231 inc_mth_vsec 232-237 ///
	inc_mw_sec 238-246 inc_vall 247-253 inc_mwall 254-262 inc_mth_vtot 263-269 inc_mth_mwtot 270-278 hhincpc_july_v 296-303 hhincpc_july_mw 304-312 ///
	work_hrs 313-315 work_search 316 work_avail 317 inc_pension 318 inc_bolsa 319 inc_social 320 inc_other 321 inc_nowork_v 322-327 work_place 328 ///
	work_UF 329-335 work_MC 336-342 work_ext 343-349 work_pendel 350 work_traffic 351 work_active 391 work_emp_main 392 work_emp_sec 393 ///
	work_stat_main 394 work_stat_sec 395 religion 396-398 family_id 402-403 family_memb 404-405 famincpc_v 406-413 famincpc_mw 414-422 ///
	insurance_main 432 insurance_all_sec 433 hh_type 434 family_indic 435 family_comp 436 family_comp_sec 437 ///
	using "2`s'/Pessoas/Amostra_Pessoas_2`s'.txt", clear
	sort hhid
	save "`pes'", replace
	merge m:1 hhid using "`dom'"
	drop _merge
	save "data by states/2`s'_all", replace
}


//for all states of region 3//
forv s=1/5{
	tempfile dom pes
	infix uf 1-2 str municip 1-7 str ponder 8-20 str hhid 21-28 hhweight 29-44 region 45 mesoreg 46-47 microreg 48-50 metrop 51-52 ///
	area 53 hhoccup 54-55 housetype 56-57 ownership 58 rent_v 59-64 rent_mw 65-73 walls 74 rooms 75-76 density 77-79 bedroom 80-81 ///
	density_bdr 82-84 bath 85-85 toilet 86 sanitation 87 water 88-89 canaliz 90 garbage 91 electricity 92 elect_meter 93 radio 94 ///
	tv 95 wash 96 fridge 97 mobile 98 tel 99 laptop 100 pc_internet 101 motobike 102  car 103 emigrant 104 inhabs 105-106 hhead 107 ///
	death 108 hhinc_v 109-115 hhinc_mw 116-125 hhinc_pc_v 126-133  hhinc_pc_mw 134-142 hhtype 143 adequate 144 ///
	using "3`s'/Domic’lios/Amostra_Domicilios_3`s'.txt", clear
	sort hhid
	save "`dom'", replace
	infix str hhid 21-28 iweight 29-44 hhead_rel 54-55 orderid 56-57 sex 58 age_ym 59-61 race 68 birthreg 69 defic_see 70 defic_hear 71 ///
	defic_walk 72 defic_mental 73 born_MC 74 born_UF 75 nationality 76 residencyr 77-80 birth_UF_ext 81 birth_UF 82-88 birth_ext 89-95 ///
	time_UF 96-98 time_MC 99-101 origin 102 origin_UF 103-109 origin_MC 110-116 origin_ext 117-123 reside_five 124 reside_UF 125-131 ///
	reside_MC 132-138 reside_ext 139-145 alphabet 146 school 147 school_course 148-149 educ_high 154-155 educ_high_grad 156 educ_level 158 ///
	educ_place 168 educ_UF 169-175 educ_MC 176-182 educ_ext 183-189 partner_live 190 partner_id 191-192 partner_type 193 marital 194 ///
	work_wk 195 work_wk_no 196 work_wk_hh 197 work_wk_subs 198 work_no 199 ocucpation 200-203 activity 204-208 work_position 209 ///
	work_emplyees 210 insurance_all 211 inc_mth_type_main 212 inc_mth_v_main 213-218 inc_v_main 219-224 inc_mw_main 225-230 inc_mth_typesec 231 ///
	inc_mth_vsec 232-237 inc_mw_sec 238-246 inc_vall 247-253 inc_mwall 254-262 inc_mth_vtot 263-269 inc_mth_mwtot 270-278 hhincpc_july_v 296-303 ///
	hhincpc_july_mw 304-312 work_hrs 313-315 work_search 316 work_avail 317 inc_pension 318 inc_bolsa 319 inc_social 320 inc_other 321 ///
	inc_nowork_v 322-327 work_place 328 work_UF 329-335 work_MC 336-342 work_ext 343-349 work_pendel 350 work_traffic 351 work_active 391 ///
	work_emp_main 392 work_emp_sec 393 work_stat_main 394 work_stat_sec 395 religion 396-398 family_id 402-403 family_memb 404-405 ///
	famincpc_v 406-413 famincpc_mw 414-422 insurance_main 432 insurance_all_sec 433 hh_type 434 family_indic 435 family_comp 436 ///
	family_comp_sec 437 using "3`s'/Pessoas/Amostra_Pessoas_3`s'.txt", clear
	sort hhid
	save "`pes'", replace
	merge m:1 hhid using "`dom'"
	drop _merge
	save "data by states/3`s'_all", replace
}
use "data by states/34_all", clear
merge m:m hhid using "data by states/35_all"
drop if hhid==""
save "data by states/34-5_all", replace

//for all states of region 4//
forv s=1/3{
	tempfile dom pes
	infix uf 1-2 str municip 1-7 str ponder 8-20 str hhid 21-28 hhweight 29-44 region 45 mesoreg 46-47 microreg 48-50 metrop 51-52 ///
	area 53 hhoccup 54-55 housetype 56-57 ownership 58 rent_v 59-64 rent_mw 65-73 walls 74 rooms 75-76 density 77-79 bedroom 80-81 ///
	density_bdr 82-84 bath 85-85 toilet 86 sanitation 87 water 88-89 canaliz 90 garbage 91 electricity 92 elect_meter 93 radio 94 tv 95 ///
	wash 96 fridge 97 mobile 98 tel 99 laptop 100 pc_internet 101 motobike 102  car 103 emigrant 104 inhabs 105-106 hhead 107 death 108 ///
	hhinc_v 109-115 hhinc_mw 116-125 hhinc_pc_v 126-133  hhinc_pc_mw 134-142 hhtype 143 adequate 144 ///
	using "4`s'/Domic’lios/Amostra_Domicilios_4`s'.txt", clear
	sort hhid
	save "`dom'", replace
	infix str hhid 21-28 iweight 29-44 hhead_rel 54-55 orderid 56-57 sex 58 age_ym 59-61 race 68 birthreg 69 defic_see 70 defic_hear 71 ///
	defic_walk 72 defic_mental 73 born_MC 74 born_UF 75 nationality 76 residencyr 77-80 birth_UF_ext 81 birth_UF 82-88 birth_ext 89-95 ///
	time_UF 96-98 time_MC 99-101 origin 102 origin_UF 103-109 origin_MC 110-116 origin_ext 117-123 reside_five 124 reside_UF 125-131 ///
	reside_MC 132-138 reside_ext 139-145 alphabet 146 school 147 school_course 148-149 educ_high 154-155 educ_high_grad 156 educ_level 158 ///
	educ_place 168 educ_UF 169-175 educ_MC 176-182 educ_ext 183-189 partner_live 190 partner_id 191-192 partner_type 193 marital 194 ///
	work_wk 195 work_wk_no 196 work_wk_hh 197 work_wk_subs 198 work_no 199 ocucpation 200-203 activity 204-208 work_position 209 ///
	work_emplyees 210 insurance_all 211 inc_mth_type_main 212 inc_mth_v_main 213-218 inc_v_main 219-224 inc_mw_main 225-230 inc_mth_typesec 231 ///
	inc_mth_vsec 232-237 inc_mw_sec 238-246 inc_vall 247-253 inc_mwall 254-262 inc_mth_vtot 263-269 inc_mth_mwtot 270-278 hhincpc_july_v 296-303 ///
	hhincpc_july_mw 304-312 work_hrs 313-315 work_search 316 work_avail 317 inc_pension 318 inc_bolsa 319 inc_social 320 inc_other 321 ///
	inc_nowork_v 322-327 work_place 328 work_UF 329-335 work_MC 336-342 work_ext 343-349 work_pendel 350 work_traffic 351 work_active 391 ///
	work_emp_main 392 work_emp_sec 393 work_stat_main 394 work_stat_sec 395 religion 396-398 family_id 402-403 family_memb 404-405 famincpc_v 406-413 ///
	famincpc_mw 414-422 insurance_main 432 insurance_all_sec 433 hh_type 434 family_indic 435 family_comp 436 family_comp_sec 437 ///
	using "4`s'/Pessoas/Amostra_Pessoas_4`s'.txt", clear
	sort hhid
	save "`pes'", replace
	merge m:1 hhid using "`dom'"
	drop _merge
	save "data by states/4`s'_all", replace
}


//for all states of region 5//
forv s=0/3{
	tempfile dom pes
	infix uf 1-2 str municip 1-7 str ponder 8-20 str hhid 21-28 hhweight 29-44 region 45 mesoreg 46-47 microreg 48-50 metrop 51-52 ///
	area 53 hhoccup 54-55 housetype 56-57 ownership 58 rent_v 59-64 rent_mw 65-73 walls 74 rooms 75-76 density 77-79 bedroom 80-81 ///
	density_bdr 82-84 bath 85-85 toilet 86 sanitation 87 water 88-89 canaliz 90 garbage 91 electricity 92 elect_meter 93 radio 94 tv 95 ///
	wash 96 fridge 97 mobile 98 tel 99 laptop 100 pc_internet 101 motobike 102  car 103 emigrant 104 inhabs 105-106 hhead 107 death 108 ///
	hhinc_v 109-115 hhinc_mw 116-125 hhinc_pc_v 126-133  hhinc_pc_mw 134-142 hhtype 143 adequate 144 ///
	using "5`s'/Domic’lios/Amostra_Domicilios_5`s'.txt", clear
	sort hhid
	save "`dom'", replace
	infix str hhid 21-28 iweight 29-44 hhead_rel 54-55 orderid 56-57 sex 58 age_ym 59-61 race 68 birthreg 69 defic_see 70 defic_hear 71 ///
	defic_walk 72 defic_mental 73 born_MC 74 born_UF 75 nationality 76 residencyr 77-80 birth_UF_ext 81 birth_UF 82-88 birth_ext 89-95 ///
	time_UF 96-98 time_MC 99-101 origin 102 origin_UF 103-109 origin_MC 110-116 origin_ext 117-123 reside_five 124 reside_UF 125-131 ///
	reside_MC 132-138 reside_ext 139-145 alphabet 146 school 147 school_course 148-149 educ_high 154-155 educ_high_grad 156 educ_level 158 ///
	educ_place 168 educ_UF 169-175 educ_MC 176-182 educ_ext 183-189 partner_live 190 partner_id 191-192 partner_type 193 marital 194 ///
	work_wk 195 work_wk_no 196 work_wk_hh 197 work_wk_subs 198 work_no 199 ocucpation 200-203 activity 204-208 work_position 209 ///
	work_emplyees 210 insurance_all 211 inc_mth_type_main 212 inc_mth_v_main 213-218 inc_v_main 219-224 inc_mw_main 225-230 inc_mth_typesec 231 ///
	inc_mth_vsec 232-237 inc_mw_sec 238-246 inc_vall 247-253 inc_mwall 254-262 inc_mth_vtot 263-269 inc_mth_mwtot 270-278 hhincpc_july_v 296-303 ///
	hhincpc_july_mw 304-312 work_hrs 313-315 work_search 316 work_avail 317 inc_pension 318 inc_bolsa 319 inc_social 320 inc_other 321 ///
	inc_nowork_v 322-327 work_place 328 work_UF 329-335 work_MC 336-342 work_ext 343-349 work_pendel 350 work_traffic 351 work_active 391 ///
	work_emp_main 392 work_emp_sec 393 work_stat_main 394 work_stat_sec 395 religion 396-398 family_id 402-403 family_memb 404-405 ///
	famincpc_v 406-413 famincpc_mw 414-422 insurance_main 432 insurance_all_sec 433 hh_type 434 family_indic 435 family_comp 436 ///
	family_comp_sec 437 using "5`s'/Pessoas/Amostra_Pessoas_5`s'.txt", clear
	sort hhid
	save "`pes'", replace
	merge m:1 hhid using "`dom'"
	drop _merge
	save "data by states/5`s'_all", replace
}
cd "/Volumes/INTENSO/Data/Censo 2010"

//merging all data by region//
use "data by states/11_all", clear
append using "data by states/12_all" "data by states/13_all" "data by states/14_all" "data by states/15_all" "data by states/16_all" "data by states/17_all"
save "data by states/1_all", replace

use "data by states/21_all", clear
append using "data by states/22_all" "data by states/23_all" "data by states/24_all" "data by states/25_all" "data by states/26_all" ///
"data by states/27_all" "data by states/28_all" "data by states/29_all"
save "data by states/2_all", replace

use "data by states/34-5_all", clear
append using "data by states/31_all" "data by states/32_all" "data by states/33_all"
save "data by states/3_all", replace

use "data by states/41_all", clear
append using "data by states/42_all" "data by states/43_all" 
save "data by states/4_all", replace

use "data by states/50_all", clear
append using "data by states/51_all" "data by states/52_all" "data by states/53_all"
save "data by states/5_all", replace

///compress individual datasets by region///

forv r=1/5{
	use "data by states/`r'_all", clear
	compress
	save , replace
}


///Prepping variables///
forv r=1/5{
	use "`r'_all", clear

gen age=age_ym
replace age=0 if age>=900
drop age_ym
//labels and categories//
label variable hhid "Household ID"
label variable iweight "Individual weight"
replace iweight=(iweight/10000000000000)
format %18.13g iweight
label variable hhead_rel "Relation to household head"
label define hhead_rel 1 "Hh head" 2 "Partner hetero" 3 "Partner homo" 4 "Child" 5 "Child only hh head" 6 "Stepchild" 7 "Son/daughter-in-law" 8 "Parent" 9 "Parent-in-law" 10 "Grandchild" 11 "Great grandchild" 12 "Sibling" 13 "Grandparent" 14 "Relative" 15 "Family member" 16 "House mate" 17 "Pensioner" 18 "Domestic worker" 19 "Relative of domestic worker"
label value hhead_rel hhead_rel
label variable orderid "Number of order"
label variable sex "Sex"
label define sex 1 "Male" 2 "Female"
label variable age "Age in years. 0 = younger than 1 year"
label variable race "Race"
label define race 1 "White" 2 "Black" 3 "Asian" 4 "Brown" 5 "Indigenous" 9 "Ignored"
label value race race
gen white=cond(race==1, 1, 0)
replace white=. if race==. & race==9
label variable white "White"
label define white 0 "Non-white" 1 "White"
label value white white
label variable birthreg "Registration of birth"
label define birthreg 1 "Card" 2 "Hospital registration" 3 "Administrative registry for indigenous births RANI" 4 "none" 5 "doesnt know" 9 "ignored"
label value birthreg birthreg
label variable defic_see "Deficiency of sight"
label define defic_see 1 "Complete" 2 "Great" 3 "Some" 4 "None" 9 "Ignored"
label value defic_see defic_see
label variable defic_hear "Deficiency of hearing"
label define defic_hear 1 "Complete" 2 "Great" 3 "Some" 4 "None" 9 "Ignored"
label value defic_hear defic_hear
label variable defic_walk "Deficiency of walking"
label define defic_walk 1 "Complete" 2 "Great" 3 "Some" 4 "None" 9 "Ignored"
label value defic_walk defic_walk
label variable defic_mental "Mental deficiency"
label define defic_mental 1 "Yes" 2 "None"
label value defic_mental defic_mental
label variable born_MC "Born in this municipality"
label define born_MC 1 "Yes, always lived" 2 "Yes, but lived somewhere else" 3 "No"
label value born_MC born_MC
label variable born_UF "Born in this state"
label define born_UF 1 "Yes, always lived" 2 "Yes, but lived somewhere else" 3 "No"
label value born_UF born_UF
label variable nationality "Nationality"
label define nationality 1 "Brazilian born" 2 "Brazilian naturalized" 3 "Foreigner"
label value nationality nationality
label variable residencyr "Year of naturalisation"
label variable birth_UF_ext "Born in other state or exterior"
label define birth_UF_ext 1 "Other state" 2 "Exterior"
label value birth_UF_ext birth_UF_ext
label variable birth_UF "State of birth"
label define birth_UF 1100000 "Rôndonia" 1200000 "Acre" 1300000 "Amazonas" 1400000 "Roraima" 1500000 "Pará" 1600000 "Amapa" 1700000 "Tocantis" 2100000 "Maranh‹o" 2200000 "Piauí" 2300000 "Ceará" 2400000 "Rio Grande do Norte" 2500000 "Paraíba" 2600000 "Pernambuco" 2700000 "Alagoas" 2800000 "Sergipe" 2900000 "Bahia" 3100000 "Minas Gerais" 3200000 "Espírito Santo" 3300000 "Rio de Janeiro" 3500000 "São Paulo" 4100000 "Paraná" 4200000 "Santa Catarina" 4300000 "Rio Grande do Sul" 5000000 "Mato Grosso do Sul" 5100000 "Mato Grosso" 5200000 "Goiás" 5300000 "Distrito Federal"
replace birth_UF=. if birth_UF==8888888 & birth_UF==9900000
label value birth_UF birth_UF
label variable birth_ext "Country of birth"
label variable time_UF "Time of residency in state in years"
label variable time_MC "Time of residency in municipalities in years"
label variable origin "Location of anterior residency"
label define origin 1 "Brazil" 2 "Foreign country"
label value origin origin
label variable origin_UF "State of anterior residency"
label define origin_UF 1100000 "Rôndonia" 1200000 "Acre" 1300000 "Amazonas" 1400000 "Roraima" 1500000 "Pará" 1600000 "Amapa" 1700000 "Tocantis" 2100000 "Maranhão" 2200000 "Piauí" 2300000 "Ceará" 2400000 "Rio Grande do Norte" 2500000 "Paraíba" 2600000 "Pernambuco" 2700000 "Alagoas" 2800000 "Sergipe" 2900000 "Bahia" 3100000 "Minas Gerais" 3200000 "Espírito Santo" 3300000 "Rio de Janeiro" 3500000 "São Paulo" 4100000 "Paraná" 4200000 "Santa Catarina" 4300000 "Rio Grande do Sul" 5000000 "Mato Grosso do Sul" 5100000 "Mato Grosso" 5200000 "Goiás" 5300000 "Distrito Federal"
replace origin_UF=. if origin_UF==8888888 & origin_UF==9899999 & origin_UF==9900000
label value origin_UF origin_UF
label variable origin_MC "Municipality of anterior residency"
label variable origin_ext "Country of anterior residency"
label variable reside_five "Location of residency on 31/07/2005"
label define reside_five 1 "Brazil" 2 "Foreign country"
label value reside_five reside_five
label variable reside_UF "State of residency on 31/07/2005"
label define reside_UF 1100000 "Rôndonia" 1200000 "Acre" 1300000 "Amazonas" 1400000 "Roraima" 1500000 "Par‡" 1600000 "Amapa" 1700000 "Tocantis" 2100000 "Maranhão" 2200000 "Piauí" 2300000 "Ceará" 2400000 "Rio Grande do Norte" 2500000 "Paraíba" 2600000 "Pernambuco" 2700000 "Alagoas" 2800000 "Sergipe" 2900000 "Bahia" 3100000 "Minas Gerais" 3200000 "Espírito Santo" 3300000 "Rio de Janeiro" 3500000 "São Paulo" 4100000 "Paraná" 4200000 "Santa Catarina" 4300000 "Rio Grande do Sul" 5000000 "Mato Grosso do Sul" 5100000 "Mato Grosso" 5200000 "Goiás" 5300000 "Distrito Federal"
replace reside_UF=. if reside_UF==8888888 & reside_UF==9899999 & reside_UF==9900000
label value reside_UF reside_UF
label variable reside_MC "Municipality of residency on 31/07/2005"
label variable reside_ext "Country of residency on 31/07/2005" 
label variable alphabet "Alphabetised"
label variable school "Frequenting school"
label define school 1 "Public" 2 "Private" 3 "No, but did" 4 "No, never"
label value school school
label variable school_course "Course frequented"
label define school_course 1 "Cr?che" 2 "Kindergarden" 3 "CA" 4 "Alphabetisation for adults" 5 "Primary school" 6 "EJA primary" 7 "Middle school" 8 "EJA middle" 9 "Graduate" 10 "Specialisation" 11 "Master" 12 "PhD"
label value school_course school_course
label variable educ_high "Highest course frequented"
label define educ_high 1 "Cr?che" 2 "Alphabetisation for adults" 3 "Elementary" 4 "Middle school 1st" 5 "1st to 3rd grade" 6 "4th grade" 7 "5th to 8th grade" 8 "Supplementary to fundamental education" 9 "Middle school 2nd" 10 "Supplementary to middle school" 11 "Graduate" 12 "Specialisation" 13 "Master" 14 "PhD"
label value educ_high educ_high
label variable educ_high_grad "Conclusion of course"
label define educ_high_grad 1 "Yes" 2 "No"
label value educ_high_grad educ_high_grad
label variable educ_level "Level of education"
label define educ_level 1 "Without or fundamental incomplete" 2 "Fundamental completed or middle incomplete" 3 "Middle completed or higher incomplete" 4 "Higher completed"
replace educ_level=. if educ_level==5
label value educ_level educ_level
label variable educ_place "Location of education"
label define educ_place 1 "This municipality" 2 "Other municipality" 3 "Abroad"
label value educ_place educ_place
label variable educ_UF "State of education"
label define educ_UF 1100000 "Rôndonia" 1200000 "Acre" 1300000 "Amazonas" 1400000 "Roraima" 1500000 "Pará" 1600000 "Amapa" 1700000 "Tocantis" 2100000 "Maranhão" 2200000 "Piauí" 2300000 "Ceará" 2400000 "Rio Grande do Norte" 2500000 "Paraíba" 2600000 "Pernambuco" 2700000 "Alagoas" 2800000 "Sergipe" 2900000 "Bahia" 3100000 "Minas Gerais" 3200000 "Espírito Santo" 3300000 "Rio de Janeiro" 3500000 "São Paulo" 4100000 "Paraná" 4200000 "Santa Catarina" 4300000 "Rio Grande do Sul" 5000000 "Mato Grosso do Sul" 5100000 "Mato Grosso" 5200000 "Goiás" 5300000 "Distrito Federal"
replace educ_UF=. if educ_UF==8888888 & educ_UF==9899999 & educ_UF==9900000
label value educ_UF educ_UF
label variable educ_MC "Municipality of education"
label variable educ_ext "Country of education"
label variable partner_live "Living with partner"
label define partner_live 1 "Yes" 2 "No, but did" 3 "No, never"
label value partner_live partner_live
label variable partner_id "Order ID of partner"
label variable partner_type "Type of relationship"
label define partner_type 1 "Marriage religious and civil" 2 "Civil marriage" 3 "Religious marriage" 4 "Consensual union"
label value partner_type partner_type
label variable marital "Marital status"
label define marital 1 "Married" 2 "Seperated" 3 "Divorced" 4 "Widowed" 5 "Single" 
label value marital marital
label variable work_wk "Worked this week"
label define work_wk 1 "Yes" 2 "No"
label value work_wk work_wk
label variable work_wk_no "Did not work but had job this week"
label define work_wk_no 1 "Yes" 2 "No"
label value work_wk_no work_wk_no
label variable work_wk_hh "Worked for household member this week"
label define work_wk_hh 1 "Yes" 2 "No"
label value work_wk_hh work_wk_hh
label variable work_wk_subs "Worked this week in subsistence production"
label define work_wk_subs 1 "Yes" 2 "No"
label value work_wk_subs work_wk_subs
label variable work_no "Number of jobs"
label define work_no 1 "One" 2 "Two or more"
label value work_no work_no
rename ocucpation occupation
label variable occupation "Occupation"
label variable activity "Activity"
label variable work_position "Position in job"
label define work_position 1 "Employed with signed card" 2 "Military/Police" 3 "Public employee" 4 "Employed without signed card" 5 "Self-employed" 6 "Employer" 7 "Non-remunerated"
label value work_position work_position
label variable work_emplyees "Number of employees"
label define work_emplyees 1 "1 to 5" 2 "6 or more"
label value work_emplyees work_emplyees
label variable insurance_all "Public insurance in any job"
label define insurance_all 1 "Yes, in main job" 2 "Yes, in other job" 3 "No"
label value insurance_all insurance_all
label variable inc_mth_type_main "Type of salary in main job"
label define inc_mth_type_main 0 "None" 1 "Money, products or goods" 2 "Benefits"
label value inc_mth_type_main inc_mth_type_main
label variable inc_mth_v_main "Monthly income main job"
label variable inc_v_main "Income main job excluding any payment in products, goods or benefits"
label variable inc_mw_main "Income main job in minimum wages"
replace inc_mw_main=(inc_mw_main/100)
format %5.2g inc_mw_main
label variable inc_mth_typesec "Type of salary in second job"
label define inc_mth_typesec 0 "None" 1 "Money, products or goods" 2 "Benefits"
label value inc_mth_typesec inc_mth_typesec
label variable inc_mth_vsec "Monthly income second job"
label variable inc_mw_sec "Income second job in minimum wages"
replace inc_mw_sec=(inc_mw_sec/100000)
label variable inc_vall "Income all jobs"
label variable inc_mwall "Income all jobs in minimum wages"
replace inc_mwall=(inc_mwall/100000)
label variable inc_mth_vtot "Total income in July 2010 from all sources"
label variable inc_mth_mwtot "Total income in July 2010 from all sources in minimum wages"
replace inc_mth_mwtot=(inc_mth_mwtot/100000)
label variable hhincpc_july_v "Hh income per capita in July 2010"
replace hhincpc_july_v=(hhincpc_july_v/100)
label variable hhincpc_july_mw "Hh income per capita in July 2010 in minimum wages"
replace hhincpc_july_mw=(hhincpc_july_mw/100000)
label variable work_hrs "Weekly working hours main job"
label variable work_search "Active job search"
label define work_search 1 "Yes" 2 "No"
label value work_search work_search
label variable work_avail "Work availability"
label define work_avail 1 "Yes" 2 "No"
label value work_avail work_avail
label variable inc_pension "Public pension income"
label define inc_pension 0 "No" 1 "Yes"
replace inc_pension=. if inc_pension==9
label value inc_pension inc_pension
label variable inc_bolsa "Bolsa Familia/PETI income"
label define inc_bolsa 0 "No" 1 "Yes" 9 "Ignored"
replace inc_bolsa=. if inc_bolsa==9
label value inc_bolsa inc_bolsa
label variable inc_social "Social protection income"
replace inc_social=. if inc_social==9
label define inc_social 0 "No" 1 "Yes" 9 "Ignored"
label value inc_social inc_social
label variable inc_other "Other income sources"
label define inc_other 0 "No" 1 "Yes" 9 "Ignored"
replace inc_other=. if inc_other==9
label value inc_other inc_other
label variable inc_nowork_v "Value of other incomes"
label variable work_place "Location of work place"
label define work_place 1 "Own house" 2 "Municipality" 3 "Other municipality" 4 "Abroad" 5 "In more than one MC or country"
label value work_place work_place
label variable work_UF "State of work place"
label define work_UF 1100000 "Rôndonia" 1200000 "Acre" 1300000 "Amazonas" 1400000 "Roraima" 1500000 "Pará" 1600000 "Amapa" 1700000 "Tocantis" 2100000 "Maranhão" 2200000 "Piauí" 2300000 "Ceará" 2400000 "Rio Grande do Norte" 2500000 "Paraíba" 2600000 "Pernambuco" 2700000 "Alagoas" 2800000 "Sergipe" 2900000 "Bahia" 3100000 "Minas Gerais" 3200000 "Espírito Santo" 3300000 "Rio de Janeiro" 3500000 "São Paulo" 4100000 "Paraná" 4200000 "Santa Catarina" 4300000 "Rio Grande do Sul" 5000000 "Mato Grosso do Sul" 5100000 "Mato Grosso" 5200000 "Goiás" 5300000 "Distrito Federal"
replace work_UF=. if work_UF==8888888 & work_UF==9899999 & work_UF==9900000
label value work_UF work_UF
label variable work_MC "Municipality of work place"
label variable work_ext "Country of work place"
label variable work_pendel "Daily travel to work"
label variable work_traffic "Time spent travelling to work"
label define work_traffic 1 "Up to 5 mins" 2 "6 to 30 mins" 3 "1/2 to 1 hour" 4 "1 to 2 hours" 5 "2+ hours"
label value work_traffic work_traffic
label variable work_active "Economic activity"
label define work_active 1 "Active" 2 "Not active"
label value work_active work_active
label variable work_emp_main "Employment status 1"
label define work_emp_main 1 "Employed" 2 "Unemployed"
label value work_emp_main work_emp_main
label variable work_emp_sec "Employment status 2"
label define work_emp_sec 1 "Employed" 2 "Unemployed"
label value work_emp_sec work_emp_sec
label variable work_stat_main "Employment type 1"
label define work_stat_main 1 "Employed with signed card" 2 "Military and public employees" 3 "Employed without signed card" 4 "Self-employed" 5 "Employer" 6 "Non remunerated" 7 "Subsistence work"
label value work_stat_main work_stat_main
label variable work_stat_sec "Employment type 2"
label define work_stat_sec 1 "Domestic worker with signed card" 2 "Domestic worker without signed card" 3 "Other worker with signed card" 4 "Military and public employees" 5 "Other workers without signed card"
label value work_stat_sec work_stat_sec
label variable religion "Religion"
label variable family_id "Familiy ID number"
label variable family_memb "Number of family members"
label variable famincpc_v "Family income per capita"
replace famincpc_v=(famincpc_v/100)
label variable famincpc_mw "Family income per capita in minimum wages"
replace famincpc_mw=(famincpc_mw/100000)
label variable insurance_main "Public insurance contributor main job"
label variable insurance_all_sec "Public insurance contributor all other jobs"
label variable hh_type "Hh type"
label define hh_type 1 "One person" 2 "Two or more unrelated persons" 3 "Two or more related persons"
label value hh_type hh_type
label variable family_indic "Family arrangement"
label define family_indic 1 "Family arrangement" 2 "No family arrangement"
label value family_indic
label variable family_comp "Family composition"
label define family_comp 1 "Couple w/o children" 2 "Couple w/o children with relatives" 3 "Couple with children" 4 "Couple with children and relatives" 5 "Single mother with children" 6 "Single mother with children and relatives" 7 "Single father with children" 8 "Single father with children and relatives" 9 "Others"
label value family_comp family_comp
label variable family_comp_sec "Composition of second family"
label define family_comp_sec 1 "Couple w/o children" 2 "Couple with children" 3 "Single mother with children"
label value family_comp_sec family_com_sec
label variable uf "State"
label define uf 11 "Rôndonia" 12 "Acre" 13 "Amazonas" 14 "Roraima" 15 "Pará" 16 "Amapa" 17 "Tocantis" 21 "Maranhão" 22 "Piauí" 23 "Ceará" 24 "Rio Grande do Norte" 25 "Paraíba" 26 "Pernambuco" 27 "Alagoas" 28 "Sergipe" 29 "Bahia" 31 "Minas Gerais" 32 "Espírito Santo" 33 "Rio de Janeiro" 35 "São Paulo" 41 "Paraná" 42 "Santa Catarina" 43 "Rio Grande do Sul" 50 "Mato Grosso do Sul" 51 "Mato Grosso" 52 "Goiás" 53 "Distrito Federal"
label variable municip "Municipality"
label variable ponder "Weighting area"
label variable hhweight "Hh weight"
replace hhweight=(hhweight/10000000000000)
format %18.13g hhweight
label variable region "Region"
label define region 1 "North" 2 "Northeast" 3 "Southeast" 4 "South" 5 "Centralwest"
label value region region
label variable mesoreg "Mesoregion"
label variable microreg "Microregion"
label variable metrop "Metropolitan area"
//generate area dummy 0 - 1//
replace area=0 if area==2
label variable area "Area"
label define area 1 "urban" 0 "rural"
label value area area
label variable hhoccup "Hh occupation"
label define hhoccup 1 "Permanently occupied" 2 "Permanently occupied no interview" 5 "improvised occupation" 6 "Collective hh with inhabitant"
label values hhoccup hhoccup
label variable housetype "Type of housing" 
label define housetype 11 "House" 12 "Townhouse/condominion" 13 "Flat" 14 "Hut" 15 "Bad hut" 51 "Tent" 52 "In establishment" 53 "Others (vagon, trailor etc.)" 61 "Asylum, orphanage" 62 "Hotel, pension" 63 "Workers' housing" 64 "Prison" 65 "Other"
label value housetype housetype
label variable ownership "Ownership status"
label variable rent_v "Rent" 
label variable rent_mw "Rent in minimum wages"
replace rent_mw=(rent_mw/100000)
format %6.5g rent_mw
label variable walls "Wall material" 
label variable rooms "Rooms" 
label variable density "Inhabitant/room" 
replace density=(density/10)
format %6.1g density
label variable bedroom "Bedrooms" 
label variable density_bdr "Inhabitant/bedroom" 
replace density_bdr=(density_bdr/10)
format %6.1g density_bdr
label variable bath "Exclusive bathrooms" 
label variable toilet "Toilet" 
label variable sanitation "Type of sanitation" 
label variable water "Water supply" 
label variable canaliz "Canalisation" 
label variable garbage "Waste disposal" 
label variable electricity "Electricity" 
label variable elect_meter "Electricity meter" 
label variable radio "Radio" 
label variable tv "TV" 
label variable wash "Washing mashine" 
label variable fridge "Fridge" 
label variable mobile "Mobile phone" 
label variable tel "Landline" 
label variable laptop "PC" 
label variable pc_internet "PC with internet" 
label variable motobike "Motorbike" 
label variable car "Car" 
label variable emigrant "Emigrant" 
label variable inhabs "Inhabitants" 
label variable hhead "Household responisbility" 
label variable death "Death in household" 
label variable hhinc_v "Monthly hh income" 
label variable hhinc_mw "Monthly hh income in minimum wages" 
replace hhinc_mw=(hhinc_mw/100000)
label variable hhinc_pc_v "Hh income p/c" 
replace hhinc_pc_v=(hhinc_pc_v/100)
label variable hhinc_pc_mw "Hh income p/c in minimum wages"
replace hhinc_pc_mw=(hhinc_pc_mw/100000)
label variable hhtype "Type of household" 
label variable adequate "Adequacy of housing" 
label define ownership 1 "Own house - fully paid" 2 "Own house - paying off" 3 "Rented" 4 "Lend by employer" 5 "Lend other" 6 "Other"
label value ownership
label define walls 1 "Bricks coated" 2 "Bricks not coated" 3 "Wood" 4 "Plaster coated" 5 "Plaster not coated" 6 "Wood unprepared" 7 "Straw" 8 "Others" 9 "No wall"
label value walls walls
label define bath 9 "9 or more"
label value bath bath
label define sanitation 1 "General sanitation network" 2 "Septic sump" 3 "Rudimentary Sump" 4 "Ditch" 5 "River, lake or sea" 6 "Other"
label value sanitation sanitation
label define water 1 "General distribution network" 2 "Well on property" 3 "Well outside property" 4 "Carro-pipa" 5 "Rain water cisterne" 6 "Rain water other" 7 "Rivers, lakes etc." 8 "Other" 9 "Well in village" 10 "Well outside village"
label value water water
label define canaliz 1 "Yes, in min. 1 room" 2 "Yes, only on the property" 3 "No"
label value canaliz canaliz
label define garbage 1 "Collected directly" 2 "Collected in collective" 3 "Burnt" 4 "Buried" 5 "Tossed in public area" 6 "Tossed in river, lake or sea" 7 "Other"
label value garbage garbage
label define electricity 1 "Yes by company" 2 "Yes, other" 3 "No electricity"
label value electricity electricity
label define elect_meter 1 "Yes, exclusive" 2 "Yes, common" 3 "No meter"
label value elect_meter elect_meter
label define hhead 1 "One" 2 "More" 3 "Ignored"
label value hhead hhead
label define hhtype 1 "Unipersonal" 2 "Nuclear" 3 "Extended" 4 "Composed"
label value hhtype hhtype
label define adequate 1 "Adequate" 2 "Semi-adequate" 3 "Inadequate"
label value adequate adequate

***CHECK values of insurance_main and insurance_all_sec
local zeros alphabet insurance_main insurance_all_sec toilet death radio tv wash fridge mobile tel laptop pc_internet motobike car emigrant work_pendel
foreach v of local zeros{
	recode `v' 2=0
}
local stuff alphabet insurance_main insurance_all_sec toilet death radio tv wash fridge mobile tel laptop pc_internet motobike car emigrant work_pendel
foreach s of local stuff{
	label define `s' 1 "Yes" 0 "No"
	label value `s' `s'
}

gen d_partner=cond(partner_live==1, 1, 0, .)

//for correct identification of meso- and microregions//
gen mregion=(uf*100)+mesoreg
gen micregion=(uf*1000)+microreg


//id for each metropolitan mesoregion//
gen regmetrop_id=0
local c 1
foreach id in 13007 15007 21002 23016 24018 25022 26017 27011 29021 29031 31030 32009 ///
				33018 35014 35032 35046 35050 35057 35058 35059 35060 35061 35062 35063 ///
				41037 43026 52010 52012 53001{
	replace regmetrop_id=`c' if micregion==`id'
	local ++c
}
recode regmetrop_id_ori (19=18) (20=18) (21=18) (22=18) (23=18) (24=18) (25=19) (26=20) (27=21) (28=22) (29=22)
label define regmetrop_id_ori 1 "Manaus" 2 "Belém" 3 "São Luís" 4 "Fortaleza" 5 "Natal" ///
	6 "João Pessoa" 7 "Recife" 8 "Maceió" 9 "Salvador" 10 "Ilhéus-Itabuna" 11 "Belo Horizonte" ///
	12 "Vitória" 13 "Rio de Janeiro" 14 "Ribeirão Preto" 15 "Campinas" 16 "Sorocaba" ///
	17 "São José dos Campos" 18 "São Paulo" 19 "Curitiba" 20 "Porto Alegre" ///
	21 "Goiânia" 22 "Brasília e entonrno"    
label value regmetrop_id regmetrop_id

label variable regmetrop_id "Name of metropolitan area"
label variable regmetrop "Dummy for metropolitan area"
label variable mregion "ID metropolitan area"

// dummy for microregion with less than 500000 inhabitants

foreach id in 12001-27010 27012-16002 16004-29011 29013-29019 29022-29023 29025-{
	g micro500=cond(micregion==`id', 1, 0)
}

// dummy for microregion with less than 173494 inhabitants, the Median population by microregion
foreach id in {
	g microMedian=cond(micregion==`id', 1, 0)
}

//log income//
g lninc_pc=ln(hhinc_pc_v) if hhinc_pc_v!=.
g lninc_mth_main=ln(inc_mth_v_main) if inc_mth_v_main!=.

//create national poverty line dummies//
gen poor=cond(hhinc_pc_v<=255, 1, 0, .)
gen sevpoor=cond(hhinc_pc_v<=127.50, 1, 0, .)
replace poor=1 if hhinc_pc_v<=277.66 & uf==33
replace sevpoor=1 if hhinc_pc_v<=138.83 & uf==33
replace poor=1 if hhinc_pc_v<=273.29 & uf==43
replace sevpoor=1 if hhinc_pc_v<=136.64 & uf==43
replace poor=1 if hhinc_pc_v<=280 & uf==35
replace sevpoor=1 if hhinc_pc_v<=140 & uf==35
replace poor=1 if hhinc_pc_v<=331.5 & uf==41
replace sevpoor=1 if hhinc_pc_v<=165.75 & uf==41

//compute proportion of children <14yrs in hh instead of total number//
bysort hhid: egen children=count(orderid) if age<14 & age!=.
replace children=0 if age>=14 & age!=.
bysort hhid: egen kids=max(children)
gen propchild=kids/inhabs
drop children kids

replace sex=0 if sex==1
replace sex=1 if sex==2

label drop sex
label define sex 1 "female" 0 "male"
label value sex sex
//this will sum up all females in the household//
bysort hhid: egen women=count(orderid) if sex==1 & sex!=.
replace women=0 if sex==0 & sex!=.
bysort hhid: egen women_=max(women)
//with this and the number of hh members, we can now generate the variable for female proportion in hh//
gen females=women_/inhabs
drop women_ women

//generate proportion of elderly in the households//
gen agef=age if sex==1
gen agem=age if sex==0
*** Matteo Sweet Matteo
bysort hhid: egen elderlym=count(orderid) if agem>=66 & agem!=.
replace elderlym=0 if agem<=65 & agem!=. 
replace elderlym=0 if sex==1
bysort hhid: egen old_male=max(elderlym)

bysort hhid: egen elderlyf=count(orderid) if agef>=61 & agef!=.
replace elderlyf=0 if agef<=60 & agef!=. 
replace elderlyf=0 if sex==0
bysort hhid: egen old_female=max(elderlyf)

gen old_members=old_male+old_female
gen elderly=old_members/inhabs
*** End of Matteo Sweet Matteo
drop old_male old_female elderlyf elderlym old_members agem agef

//generate a dummy indicating a female hh head//
gen femhead_=cond(sex*hhead_rel==1, 1, 0, .)
egen femhead=max(femhead_), by(hhid)
drop femhead_

replace work_act=0 if work_act==2
//cutting sample to working age population//
//minimum age for children is 16, but allowed 14 for apprenticeships//
gen childlab_=cond(work_act==1, 1, 0, .) if age<14
egen childlab=max(childlab_), by(hhid)
drop childlab_
//I cut the age following the legal retirement age for men and women//
gen oldlab_=0
replace oldlab_=1 if work_act==1 & age>65 & sex==0
replace oldlab_=1 if work_act==1 & age>60 & sex==1
replace oldlab_=. if work_act==.
egen oldlab=max(oldlab_), by(hhid)
drop oldlab_

// URBAN POPULATION//
bysort micregion: egen toti_MR=count(iid) //all individual observations in Microregion
bysort micregion: egen MRurbant=total(area==1)
gen MR_urban=MRurbant/toti_MR
drop MCurbant

label variable MC "Observations in municip"
label variable MCrural "Ratio of rural areas within municip"
label variable MCurban "Ratio of urban areas within municip"
label variable poor "poor by national poverty line (1/2 of minimum wage)"
label variable sevpoor "severly poor by national poverty line (1/4 of minimum wage)"
label variable propchild "Proportion of children (<14 yrs) in hh"
label variable females "Proportion of female hh members"
label variable elderly "Proportion of old hh members (m>65, f>60)"
label variable femhead "Dummy female hh head"
label variable childlab "Dummy children (10-13 yrs) working"
label variable oldlab "Dummy elderly (m>65, f>60) working"
label variable lninc_pc "Ln(monthly hh income p/c)"
label variable lninc_mth_main "Ln(monthly income main job)"

//generate groups of occupation and activity//
gen activ_group=0 if activity==0
replace activ_group=1 if activity<=3002
replace activ_group=2 if activity>=5000 & activity<=9000
replace activ_group=3 if activity>=10010 & activity<=33002
replace activ_group=4 if activity>=35010 & activity<=35022
replace activ_group=5 if activity>=36000 & activity<=39000
replace activ_group=6 if activity>=41000 & activity<=43999
replace activ_group=7 if activity>=45010 & activity<=48999
replace activ_group=8 if activity>=49010 & activity<=53002
replace activ_group=9 if activity>=55000 & activity<=56999
replace activ_group=10 if activity>=58000 & activity<=63000
replace activ_group=11 if activity>=64000 & activity<=66002
replace activ_group=12 if activity==68000
replace activ_group=13 if activity>=69000 & activity<=75000
replace activ_group=14 if activity>=77010 & activity<=82009
replace activ_group=15 if activity>=84011 & activity<=84999
replace activ_group=16 if activity>=85011 & activity<=85999
replace activ_group=17 if activity>=86001 & activity<=88000
replace activ_group=18 if activity>=90000 & activity<=93020
replace activ_group=19 if activity>=94010 & activity<=96090
replace activ_group=20 if activity==97000
replace activ_group=21 if activity==99000

label variable activ_group "Sector of activity"
label define activ_group 0 "not defined" 1 "Agriculture" 2 "Extractive industry" 3 "Processing industry" 4 "Electricity, gas" ///
5 "Sanitation and sewage" 6 "Construction" 7 "Commerce" 8 "Transport" 9 "Housing and food" 10 "Information and communication" ///
11 "Financial services" 12 "Real estate" 13 "Professional consulting, science and technology" 14 "Administration and service" ///
15 "Public administration, security" 16 "Education" 17 "Health and social services" 18 "Arts, culture, sports" 19 "Other services" ///
20 "Domestic service" 21 "International organisations, foreign institurions"
label value activ_group activ_group

gen sector=0
replace sector=1 if activ_group==1
replace sector=2 if activ_group==3
replace sector=3 if activ_group==6
replace sector=4 if activ_group==7
replace sector=5 if activ_group==16
replace sector=6 if activ_group==20
replace sector=. if activ_group==0
replace sector=. if activ_group==.
label variable sector "Sector"
label define sector 1 "Agriculture" 2 "Processing industry" 3 "Construction" 4"Commerce" 5 "Education" 6 "Domestic service" 0 "Others"
label value sector sector

gen occu_group=0 if occupation==0
replace occu_group=1 if occupation>=1111 & occupation<=1439
replace occu_group=2 if occupation>=2111 & occupation<=2659
replace occu_group=3 if occupation>=3111 & occupation<=3522
replace occu_group=4 if occupation>=4110 & occupation<=4419
replace occu_group=5 if occupation>=5111 & occupation<=5419
replace occu_group=6 if occupation>=6111 & occupation<=6225
replace occu_group=7 if occupation>=7111 & occupation<=7549
replace occu_group=8 if occupation>=8111 & occupation<=8350
replace occu_group=9 if occupation>=9111 & occupation<=9629
replace occu_group=10 if occupation>=110 & occupation<=999

label variable occu_group "Occupation Type"
label define occu_group 0 "not defined" 1 "Director/Manager" 2 "Professional scientist, intellectual" 3 "Technician, professional medium level" ///
4 "Administrative assistant" 5 "Service, sales man" 6 "Qualified agricultural worker" 7 "Professional construction workers" ///
8 "Machine operator, mechanic" 9 "Elementary occupation" 10 "Army, police, firemen" 
label value occu_group occu_group

*recipient of other incomes
local others inc_pension inc_bolsa inc_social inc_other
foreach v of local others{
	replace `v'=. if `v'==9
}

egen tot_pension=total(inc_pension), by(hhid)
egen tot_bolsa=total(inc_bolsa), by(hhid)
egen tot_social=total(inc_social), by(hhid)
egen tot_other=total(inc_other), by(hhid)
g tot_allother=tot_pension+tot_bolsa+tot_social+tot_other
local others tot_bolsa tot_pension tot_social tot_other tot_allother
tokenize Bolsa Pension Social Other Anyother
local c 1
foreach s of local others{
	label variable `s' "``c''"
	local ++c
}

*time in metropolitan area
gen time=0 if time_MC==0
forv x=1/5{
	replace time=`x' if time_MC==`x'
}
replace time=6 if time_MC>=6 & time_MC<=10
replace time=7 if time_MC>=11
replace time=. if time_MC==.
label variable time "Years in current place"
label define time 0 "<1yr" 1 "1yr" 2 "2yrs" 3 "3yrs" 4 "4yrs" 5 "5yrs" 6 "6-10yrs" 7 "11yrs or more"
label value time time

g d_tot_pension=cond(tot_pension==0, 0, 1, .)
g d_tot_bolsa=cond(tot_bolsa==0, 0, 1, .)
g d_tot_social=cond(tot_social==0, 0, 1, .)
g d_tot_other=cond(tot_other==0, 0, 1, .)
replace work_no=0 if work_no==2
replace work_emp_main=0 if work_emp_main==2

tempvar t_worker
egen t_worker=total(work_active==1), by(hhid)
g prop_worker=t_worker/inhabs

** dummy whether partner employed or not
bysort hhid: gen partnerwork_=cond(work_emp_main==1 & (hhead_rel==2 | hhead_rel==3), 1, 0)
bysort hhid: gen partnerwork=count(partnerwork_)

//proportion of rent of hhincome
gen p_rentinc=rent_v/hhinc_v
replace p_rentinc=. if rent_v==.
replace p_rentinc=. if hhinc_v==.
label var p_rentinc "Proportion of rent over hh income"

///define great regions of origin///
g origin_reg=.
replace origin_reg=1 if origin_UF>1000000 & origin_UF<=1700000
replace origin_reg=2 if origin_UF>2000000 & origin_UF<=2900000
replace origin_reg=3 if origin_UF>3000000 & origin_UF<=3500000
replace origin_reg=4 if origin_UF>4000000 & origin_UF<=4300000
replace origin_reg=5 if origin_UF>=5000000 & origin_UF<=5300000
replace origin_reg=region if origin_UF==8888888
label value origin_reg region

* children
	bysort hhid: egen childs=count(orderid) if age<16 & age!=.
	replace childs=0 if age>=16 & age!=.
	bysort hhid: egen kidshh=max(childs)
	g kidshh_d=cond(kidshh>=1, 1, 0)
	g kidshh_c=kidshh
	replace kidshh_c=3 if kidshh>=3

save, replace

}

//merging ALL data//

use "1_all", clear
append using "2_all" "3_all" "4_all" "5_all"
drop if (age>65 | age<=15) & sex==0	
drop if (age>60 | age<=15) & sex==1
compress

save "CENSO 2010_wa", replace

* generate origin identifier for microregion
collapse (max) uf region micregion, by(municip) 
rename uf uf_ori
rename region region_ori
label value uf_ori uf
label value region_ori region
* add population data 
merge m:m micregion using "MR_population"
drop _merge
drop MR UF
merge m:m municip using "MC_population"
drop _merge

merge m:1 municip using "$data\MC_codes", keep(3) nogen
rename mc orig
rename municip origin_MC
ren micregion orig_mr

save "origin_MC", replace

use "CENSO 2010_wa", clear
collapse (max) micregion, by(municip) 
ren municip reside_MC
ren micregion reside_MR
save "reside_MC", replace

*****************************************************
*** INDIVIDUAL DATA ***

use "CENSO 2010_wa", clear

label variable regmetrop_id "Name of metropolitan area"
label variable regmetrop "Dummy for metropolitan area"
label variable mregion "ID metropolitan area"
	
* marital status
	replace marital=0 if marital==2 | marital==3 | marital==4
	replace marital=3 if marital==5
	replace marital=2 if marital==1
	replace marital=1 if marital==0
	label drop marital
	la define marital 1 "Separated, divorced, widowed" 2 "Married" 3 "Single"
* informality
	gen informal=2 if work_stat_main==3
	replace informal=2 if work_stat_main==6
	replace informal=2 if work_stat_main==7
	replace informal=0 if work_stat_main==1
	replace informal=0 if work_stat_main==5
	replace informal=3 if work_stat_main==4
	replace informal=4 if work_stat_main==5 & work_emplyees==1
	replace informal=1 if work_stat_main==2
	replace informal=. if work_stat_main==.
	la define informal 0 "Formal private" 1 "Formal public" 2 "Informal" 3 "Self-employed" 4 "Small business"
	la value informal informal
	
** keep only necessary variables
	keep hhid orderid age school inc_mth_v_main work_hrs work_active work_emp_main ///
	propchild prop_worker educ_level educ_high activ_group occupation p_rentinc hhinc_v ///
	marital regmetrop_id sex white femhead work_stat_main work_emplyees inhabs ///
	time_MC origin_MC origin_UF origin_reg uf born_MC born_UF kidshh kidshh_d kidshh_c ///
	regmetrop origin nationality municip occu_group poor lninc_pc lninc_mth_main ///
	mregion iweight work_place work_UF work_MC work_pendel work_traffic area region ///
	origin_reg MC MCrural MCurban rent_v rent_mw housetype ownership rooms density adequate ///
	inc_vall inc_v_main inc_mth_vtot informal regmetrop_id hhead_rel micregion reside_*
	
tostring reside_MC, replace
	
rename propchild p_children
rename prop_worker p_worker
	
* add population data 
* add population data 
merge m:m micregion using "MR_population"
drop _merge
drop MR UF
merge m:m municip using "MC_population"
drop _merge 

* g population size categories for microregions based on distribution
format %10.0g population
g pop_size=1 if population<=173493  // median
replace pop_size=2 if population>173493 & population<=500000 // median to 90th %tile
replace pop_size=3 if population>500000 & population<=999999 // 90th to 95th
replace pop_size=4 if population>=1000000 // upper end
la define pop_size 1 "173493" 2 "173494-500000" 3 "500001-999999" 4 "1Mio"
la value pop_size pop_size	

drop population pop_urban

merge m:1 municip using "$data\MC_codes", nogen
tostring origin_MC, replace
** migrant identification
merge m:1 origin_MC using "$data\MC_origin_codes", keep(1 3) nogen

* dummy to identify anyone living in a different municipality than before	
g migDummy=cond(orig!=mc & orig!=., 1, 0)
replace migDummy=0 if orig==. | origin_MC=="8888888" | origin_MC=="9999999" 
foreach uf in 11 12 13 14 15 16 17 21 22 23 24 25 26 27 28 29 31 32 33 35 41 42 43 50 51 52 53{
	replace migDummy=0 if origin_MC=="`uf'88888" | origin_MC=="`uf'99999"
}

*gen migrant dummy excluding returnees or international migrants
gen migrant=0 if born_MC==1 //non-migrants //
	replace migrant=1 if born_MC==3 & migDummy==1 & time_MC<10 //non-returnees internal//
	
*gen migrant dummy for people moving between different MCs and by year of migration, use 3 migration periods
forv t=0/2{
	g migrant`t'=cond(migrant==1 & time_MC==`t', 1, 0)
}	

merge m:1 origin_MC using "origin_MC", keep(1 3) nogen
merge m:1 reside_MC using "reside_MC", keep(1 3) nogen

* generate dummies for characteristics that will be collapsed to MC level
g nonwhite = cond(white==0, 1, 0, .)
g youth = cond(age>=16 & age<=25, 1, 0, .)
g dropouts = cond(age>=14 & age<=18 & school==3, 1, 0, .)
replace inc_mth_v_main=1 if inc_mth_v_main==0
g lwage_ = log(inc_mth_v_main)

* restricting data to relevant sub-sample
	keep if school==3 // only workers currently not in education
	drop if age<=15 | age>65 // only workers who are most likely to be finished with education


compress
save "CENSO 2010_ind_p2", replace

*****************************************************************************

*done

